import data

store_data <- read.csv("Superstore _data.csv")

load packages

library(ggplot2)
library(tidyverse)
library(plotly)
library(lubridate)

gross profit and revenue :

store_data |>
  mutate(unitPrice = ifelse(Discount > 0,(Sales/Quantity)/(1-Discount),Sales/Quantity),
         subtotal = unitPrice*Quantity,
         costPrice = Sales-Profit,
         markup = 100*Profit/costPrice) |>
  group_by(Category,Sub.Category) |>
  summarise(revenue = sum(subtotal),
            gross_profit = as.integer(sum(revenue)-sum(costPrice)))->gp_category
## `summarise()` has grouped output by 'Category'. You can override using the `.groups` argument.
gp_category |> 
  mutate(grossMargin=(gross_profit/revenue)*100) ->gp_category

revenue (category-based):

ggplotly(ggplot(gp_category,aes(Category,revenue,fill = Sub.Category))+
           geom_bar(stat = "identity")+
           labs(x= "Categories", y = "Revenue",
                title = "Revenue Generated by Each Categories and Sub-Categories")+
           theme(axis.text.x = element_text(angle = 45)))

revenue and gross profit of each sub-categories:

options(scipen = 999)
gp_category |> 
pivot_longer(revenue:gross_profit,
                names_to = "legend",
             values_to = "Amount")->pivot_gp_category

barplot(stacked):

options(scipen = 999)
ggplotly(ggplot(pivot_gp_category,aes(Sub.Category,Amount,fill = legend))
         +geom_bar(stat = "identity")+theme(
           axis.text.x = element_text(angle = 90))+
           labs(x= "Sub-Categories", y = "Amount(Dollars)",
           title = "Revenue and Gross Profit Generated of each Sub-Categories"))

revenue and gross profit categories:

options(scipen = 999)
ggplotly(ggplot(pivot_gp_category,aes(Category,Amount,fill = legend))
         +geom_bar(stat = "identity", position = "dodge")+theme(
           axis.text.x = element_text(angle = 90))+
           labs(x="Categories",y="Amount(Dollars)",
            title = "Revenue and Gross Profit Generated of each Categories"))

no.of sales(category-based):

store_data |>
  group_by(Category,Sub.Category) |>
  count() |>
  ggplot(aes(x = Category, y = n , fill = Sub.Category))+geom_col()+
  theme (axis.text.x = element_text(angle = 45))+
  labs(x="Categories",y="No.of Transactions",
  title = "No.of Transactions of Sub-Categories in Each Category")-> sale_volume
ggplotly(sale_volume)

calculations for segments:

store_data |>
  mutate(unitPrice = ifelse(Discount > "0",(Sales/Quantity)/(1-Discount),Sales/Quantity),
         subtotal = unitPrice*Quantity) |>
  group_by(Customer.ID,Customer.Name,Segment) |>
  summarise(total_revenue = sum(subtotal),
            total_orders = n_distinct(Order.ID),
            total_profit = sum(Profit)) -> customers_based
## `summarise()` has grouped output by 'Customer.ID', 'Customer.Name'. You can override using the `.groups` argument.

customer segments vs profits:

ggplotly(ggplot(customers_based |> filter(total_profit > "0")|>
         group_by(Segment) |>
         summarise(profits = sum(total_profit)),aes(Segment,profits,fill = Segment))+
           geom_col()+labs(x="Names of segments of Customers",y="Profits",
                           title = "Profits In Each segments of Customers"))

segments size:

ggplotly(ggplot(customers_based |>
  group_by(Segment) |> 
  summarise(total_customers =  n(),
            segment_revenue = sum(total_revenue)), aes(total_customers,segment_revenue, fill = Segment))
  +geom_col(width = 200,alpha = 0.65, position = "stack")+
  labs(x="No.of Customers",y="Revenue",title = "Graph of Segment Size"))
## Warning: position_stack requires non-overlapping x intervals

no.of sales vs years (2015-2018):

store_data |>
  mutate(unitPrice = ifelse(Discount > 0,(Sales/Quantity)/(1-Discount),Sales/Quantity),
         subtotal = unitPrice*Quantity,
         order_date = strptime(Order.Date,format = "%m/%d/%Y"),
         ship_date = strptime(Ship.Date,format = "%m/%d/%Y"),
         order_year = lubridate::year(order_date),
         order_month = lubridate::month(order_date,label = TRUE, abbr = FALSE ),
         weekday = weekdays(order_date)) |>
  group_by(order_year,weekday) |> 
  summarise(n_transactions = n()) -> weekdays_trends
## `summarise()` has grouped output by 'order_year'. You can override using the `.groups` argument.
 ggplotly(ggplot(weekdays_trends,aes(n_transactions,weekday, fill = weekday ))+geom_col()+
  facet_wrap(~order_year)+labs(x="No.of Transactions",y = "Week Days",
  title = "Graph of Sales in weekdays in Different Years"))

no.of sales vs months (2015-2018):

store_data |>
  mutate(unitPrice = ifelse(Discount > 0,(Sales/Quantity)/(1-Discount),Sales/Quantity),
         subtotal = unitPrice*Quantity,
         order_date = strptime(Order.Date,format = "%m/%d/%Y"),
         ship_date = strptime(Ship.Date,format = "%m/%d/%Y"),
         order_year = lubridate::year(order_date),
         order_month = lubridate::month(order_date,label = TRUE, abbr = FALSE ),
         weekday = weekdays(order_date)) |>
  group_by(order_year,weekday) |> 
  summarise(n_transactions = n()) -> weekdays_trends
## `summarise()` has grouped output by 'order_year'. You can override using the `.groups` argument.
 ggplotly(ggplot(weekdays_trends,aes(n_transactions,weekday, fill = weekday ))+geom_col()+
  facet_wrap(~order_year)+labs(x="No.of Transactions",y = "Week Days",
  title = "Graph of Sales in weekdays in Different Years"))

#No. of Sales of Categories in Months of Years(2015-2018):

store_data |>
  mutate(unitPrice = ifelse(Discount > 0,(Sales/Quantity)/(1-Discount),Sales/Quantity),
         order_date = strptime(Order.Date,format = "%m/%d/%Y"),
         order_year = lubridate::year(order_date),
         order_month = month(order_date,label = TRUE),
         subtotal = unitPrice*Quantity) |> 
  group_by(Segment,order_year,order_month) |>
  summarise( n_transactions = n()) -> segment_months
## `summarise()` has grouped output by 'Segment', 'order_year'. You can override using the `.groups` argument.
ggplot(segment_months,aes((order_month),Segment,colour= Segment,size = n_transactions ))+
    geom_point(alpha =.6)+theme(axis.text.x = element_text(angle = 90))+
  facet_wrap(~order_year)+labs(x="",y="Segment",title = "Graph of No. of transactions By Each Segment(Monthly)")

country based calculations:

 store_data |>
   mutate(unitPrice = ifelse(Discount > 0,(Sales/Quantity)/(1-Discount),Sales/Quantity),
          subtotal = unitPrice*Quantity) |> 
   group_by(Country,Order.ID) |> 
   summarise(revenue = sum(subtotal)) |>
   group_by(Country) |> 
   summarise(total_revenue = sum(revenue),
             n_transactions = n_distinct(Order.ID),
             avg_order_value = mean(revenue)) -> country_based
## `summarise()` has grouped output by 'Country'. You can override using the `.groups` argument.

total revenue by country:

ggplotly(
   ggplot(country_based,aes(Country,total_revenue,fill = Country))+
            geom_bar(stat = "identity")+theme(axis.text.x = element_text(angle = 45))+
     labs(x="",y="Revenue",title = "Graph of Total Revenue against Each Country"))

avg.order value vs total transactions:

ggplotly(ggplot(country_based,aes(n_transactions,avg_order_value,color = Country))+
   geom_point(alpha = 0.6, size = 3)+
    labs(x="No.of transactions",y="Average Order Value",
   title = "Graph of Average Invoice Value and No. of Sales in Countries"))

date based analysis:

 date_today <- lubridate::ymd("20190130")
 store_data |>
   mutate(order_date = strptime(Order.Date,"%m/%d/%Y"))|>
   group_by(Customer.ID,Region) |> 
   summarise(last_buy = max(order_date)) |>
   mutate(days_lastPurchse = difftime(date_today,last_buy,units = "days"),
          days_lastPurchse = as.integer((days_lastPurchse))) -> region_based
## `summarise()` has grouped output by 'Customer.ID'. You can override using the `.groups` argument.
 region_based |>
   mutate(months_lastPurchase = days_lastPurchse/30,
 months_lastPurchase = as.integer(months_lastPurchase)) -> region_based

 
 ggplotly(ggplot(region_based,aes(Region,months_lastPurchase,
                                  shape = Region,fill = Region))+geom_boxplot()+
            labs(x="",y=" Time  Since Last Sale (months)"))

customer relationship :

store_data |>
   mutate(unitPrice = ifelse(Discount > 0,(Sales/Quantity)/(1-Discount),Sales/Quantity),
          subtotal = unitPrice*Quantity,
          order_date = strptime(Order.Date,"%m/%d/%Y"))|>
   group_by(Customer.ID) |> 
   summarise(revenue = sum(subtotal),
             last_buy = max(order_date),
             first_buy = min(order_date)) |> 
   mutate(days_relation = difftime(last_buy,first_buy, units = "days"),
          months = days_relation/30,
          months = as.integer(months)) -> customer_relation

 ggplotly(ggplot(customer_relation |>
                   filter(months > "0"),aes(months))+
            geom_bar()+
            labs( x="No. of Months", y = "No. of Customers",
            title = "Customer Relation With Bussiness In Months"))